import nltk
import numpy
text = "That U.S.A. poster-print costs $12.40..."
# 用一些正则表达式规则来tokenize
patterns = r"(?:\w\.)+|\w+(?:-\w+)*|\$?\d+(?:\.\d+)?%?|\.\.\."

out = nltk.regexp_tokenize(text, patterns)
print(out)